podatki <- read.table("/cloud/project/Poglavje 6/Naloga 2/Avtomobili.csv", header=TRUE, sep=";", dec="," )
head(podatki)
## ID Ccm Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 1 1 1995 146 5800 455 1380 9.9 210 6.5
## 2 2 1969 150 6200 429 1370 8.4 210 6.2
## 3 3 1969 150 6200 444 1300 9.0 210 6.2
## 4 4 1969 150 6200 429 1370 8.4 215 6.2
## 5 5 1969 150 6200 410 1240 8.4 210 6.2
## 6 6 1747 140 6300 445 1270 10.0 205 6.1
## Poraba_120 Motor Pogon Cena
## 1 8.3 2 1 33263
## 2 7.8 2 1 37387
## 3 8.0 2 1 25453
## 4 7.8 2 1 35994
## 5 8.0 2 1 19061
## 6 8.2 2 1 20215
Opis spremenljivk:
podatki$MotorF <- factor(podatki$Motor,
levels = c(1, 2),
labels = c("dizelski", "bencinski"))
podatki$PogonF <- factor(podatki$Pogon,
levels = c(1, 2, 3),
labels = c("spredaj", "zadaj", "4x4"))
library(pastecs)
##
## Attaching package: 'pastecs'
## The following object is masked from 'package:magrittr':
##
## extract
## The following objects are masked from 'package:dplyr':
##
## first, last
round(stat.desc(podatki[c(2:13)], basic=FALSE), 2)
## Ccm Km Vrtljajimax Dolžina Teža Pospešek
## median 1984.00 118.00 5500.00 443.00 1250.00 11.10
## mean 2155.22 133.16 5428.28 435.24 1287.95 11.56
## SE.mean 40.10 3.11 33.40 1.84 14.31 0.15
## CI.mean.0.95 78.80 6.10 65.63 3.62 28.12 0.29
## var 747772.93 4483.65 518632.81 1575.66 95248.47 9.85
## std.dev 864.74 66.96 720.16 39.69 308.62 3.14
## coef.var 0.40 0.50 0.13 0.09 0.24 0.27
## Hitrost Poraba_90 Poraba_120 Motor Pogon Cena
## median 190.00 6.30 9.00 2.00 1.00 26991.00
## mean 192.66 6.72 10.29 1.78 1.51 34263.33
## SE.mean 1.43 0.08 0.19 0.02 0.03 1307.38
## CI.mean.0.95 2.81 0.16 0.37 0.04 0.07 2569.12
## var 949.74 3.27 16.05 0.17 0.57 794796140.98
## std.dev 30.82 1.81 4.01 0.41 0.75 28192.13
## coef.var 0.16 0.27 0.39 0.23 0.50 0.82
summary(podatki)
## ID Ccm Km Vrtljajimax
## Min. : 1 Min. : 796 Min. : 39.0 Min. :3400
## 1st Qu.:117 1st Qu.:1598 1st Qu.: 90.0 1st Qu.:5000
## Median :233 Median :1984 Median :118.0 Median :5500
## Mean :233 Mean :2155 Mean :133.2 Mean :5428
## 3rd Qu.:349 3rd Qu.:2461 3rd Qu.:150.0 3rd Qu.:6000
## Max. :465 Max. :5987 Max. :485.0 Max. :8250
## Dolžina Teža Pospešek Hitrost
## Min. :330.0 Min. : 620 Min. : 4.40 Min. :125.0
## 1st Qu.:404.0 1st Qu.:1065 1st Qu.: 9.40 1st Qu.:170.0
## Median :443.0 Median :1250 Median :11.10 Median :190.0
## Mean :435.2 Mean :1288 Mean :11.56 Mean :192.7
## 3rd Qu.:467.0 3rd Qu.:1460 3rd Qu.:13.30 3rd Qu.:210.0
## Max. :511.0 Max. :2200 Max. :25.00 Max. :320.0
## Poraba_90 Poraba_120 Motor Pogon
## Min. : 3.600 Min. : 4.90 Min. :1.000 Min. :1.000
## 1st Qu.: 5.400 1st Qu.: 7.40 1st Qu.:2.000 1st Qu.:1.000
## Median : 6.300 Median : 9.00 Median :2.000 Median :1.000
## Mean : 6.723 Mean :10.29 Mean :1.781 Mean :1.508
## 3rd Qu.: 7.600 3rd Qu.:12.30 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :15.500 Max. :35.60 Max. :2.000 Max. :3.000
## Cena MotorF PogonF
## Min. : 8155 dizelski :102 spredaj:303
## 1st Qu.: 17794 bencinski:363 zadaj : 88
## Median : 26991 4x4 : 74
## Mean : 34263
## 3rd Qu.: 39314
## Max. :240320
t.test(podatki$Poraba_120 ~ podatki$Motor,
paired = FALSE,
alternative = "two.sided",
var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: podatki$Poraba_120 by podatki$Motor
## t = -3.3364, df = 192.95, p-value = 0.001018
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -2.1199546 -0.5447189
## sample estimates:
## mean in group 1 mean in group 2
## 9.251961 10.584298
fit <- lm(Pospešek ~ Teža + Km + PogonF + MotorF + Km:MotorF,
data=podatki)
summary(fit)
##
## Call:
## lm(formula = Pospešek ~ Teža + Km + PogonF + MotorF + Km:MotorF,
## data = podatki)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2557 -1.1652 -0.3618 0.6809 8.2220
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.4965680 0.6049569 28.922 < 2e-16 ***
## Teža 0.0025855 0.0004173 6.195 1.30e-09 ***
## Km -0.0690078 0.0054289 -12.711 < 2e-16 ***
## PogonFzadaj 0.8896774 0.2627070 3.387 0.000769 ***
## PogonF4x4 0.9718422 0.2508565 3.874 0.000123 ***
## MotorFbencinski -4.5085332 0.5578018 -8.083 5.69e-15 ***
## Km:MotorFbencinski 0.0281638 0.0050838 5.540 5.11e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.76 on 458 degrees of freedom
## Multiple R-squared: 0.6896, Adjusted R-squared: 0.6856
## F-statistic: 169.6 on 6 and 458 DF, p-value: < 2.2e-16
podatki_clu_std <- as.data.frame(scale(podatki[c(2:9)]))
podatki_clu_std$Različnost = sqrt(podatki_clu_std$Ccm^2 + podatki_clu_std$Km^2 + podatki_clu_std$Vrtljajimax^2 + podatki_clu_std$Dolžina^2 + podatki_clu_std$Teža^2 + podatki_clu_std$Pospešek^2 + podatki_clu_std$Hitrost^2 + podatki_clu_std$Poraba_90^2)
head(podatki_clu_std[order(-podatki_clu_std$Različnost), ])
## Ccm Km Vrtljajimax Dolžina Teža Pospešek
## 107 3.837900 5.254518 2.18245503 0.4978333 1.302704 -2.280301
## 105 3.837900 4.612344 1.14102219 0.9512956 1.302704 -2.025343
## 209 4.431143 3.895499 -0.31698379 1.9086049 2.647385 -1.579165
## 208 4.431143 3.895499 -0.31698379 1.8078355 2.841796 -1.579165
## 210 4.431143 3.895499 -0.31698379 0.3718716 1.999346 -1.451686
## 215 4.405702 3.701354 0.09958935 0.3718716 1.837336 -1.897863
## Hitrost Poraba_90 Različnost
## 107 4.131863 4.851123 9.739244
## 105 3.807377 2.695662 8.109985
## 209 1.860458 3.193076 7.855740
## 208 1.860458 1.811371 7.449204
## 210 1.860458 2.308785 7.067198
## 215 1.860458 2.308785 7.001994
print(podatki[c(107, 105, 209, 208), ])
## ID Ccm Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 107 107 5474 485 7000 455 1690 4.4 320 15.5
## 105 105 5474 442 6250 473 1690 5.2 310 11.6
## 209 209 5987 394 5200 511 2105 6.6 250 12.5
## 208 208 5987 394 5200 507 2165 6.6 250 10.0
## Poraba_120 Motor Pogon Cena MotorF PogonF
## 107 35.6 2 2 213644 bencinski zadaj
## 105 12.7 2 2 240320 bencinski zadaj
## 209 24.5 2 2 154342 bencinski zadaj
## 208 20.6 2 2 169724 bencinski zadaj
podatki <- podatki[c(-107, -105, -209, -208),]
podatki_clu_std <- as.data.frame(scale(podatki[c(2:9)]))
head(podatki_clu_std, 4)
## Ccm Km Vrtljajimax Dolžina Teža Pospešek
## 1 -0.1612971 0.2518182 0.523296 0.5113345 0.32094457 -0.5499112
## 2 -0.1937569 0.3171924 1.080110 -0.1466210 0.28801509 -1.0334610
## 3 -0.1937569 0.3171924 1.080110 0.2329687 0.05750873 -0.8400410
## 4 -0.1937569 0.3171924 1.080110 -0.1466210 0.28801509 -1.0334610
## Hitrost Poraba_90
## 1 0.6113771 -0.1003219
## 2 0.6113771 -0.2739703
## 3 0.6113771 -0.2739703
## 4 0.7801263 -0.2739703
library(factoextra)
get_clust_tendency(podatki_clu_std,
n = nrow(podatki_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.8540707
##
## $plot
## NULL
library(dplyr)
WARD <- podatki_clu_std %>%
get_dist(method = "euclidean") %>%
hclust(method = "ward.D2")
WARD
##
## Call:
## hclust(d = ., method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 461
library(factoextra)
fviz_dend(WARD)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use
## `guides(<scale> = "none")` instead.
set.seed(1)
library(NbClust)
Indeksi <- NbClust(podatki_clu_std,
distance = "euclidean",
method = "ward.D2",
index ="all",
min.nc = 2, max.nc = 6)
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 2 proposed 2 as the best number of clusters
## * 4 proposed 3 as the best number of clusters
## * 10 proposed 4 as the best number of clusters
## * 3 proposed 5 as the best number of clusters
## * 1 proposed 6 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
library(factoextra)
fviz_nbclust(Indeksi,
ggtheme = theme_linedraw())
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first
## element will be used
## Warning in if (class(best_nc) == "matrix") .viz_NbClust(x,
## print.summary, : the condition has length > 1 and only the first
## element will be used
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first
## element will be used
## Warning in if (class(best_nc) == "matrix") {: the condition has length
## > 1 and only the first element will be used
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 1 proposed 1 as the best number of clusters
## * 2 proposed 2 as the best number of clusters
## * 4 proposed 3 as the best number of clusters
## * 10 proposed 4 as the best number of clusters
## * 3 proposed 5 as the best number of clusters
## * 1 proposed 6 as the best number of clusters
## * 3 proposed NA's as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 4 .
podatki$RazvrstitevWARD <- cutree(WARD,
k = 4)
head(podatki)
## ID Ccm Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 1 1 1995 146 5800 455 1380 9.9 210 6.5
## 2 2 1969 150 6200 429 1370 8.4 210 6.2
## 3 3 1969 150 6200 444 1300 9.0 210 6.2
## 4 4 1969 150 6200 429 1370 8.4 215 6.2
## 5 5 1969 150 6200 410 1240 8.4 210 6.2
## 6 6 1747 140 6300 445 1270 10.0 205 6.1
## Poraba_120 Motor Pogon Cena MotorF PogonF RazvrstitevWARD
## 1 8.3 2 1 33263 bencinski spredaj 1
## 2 7.8 2 1 37387 bencinski spredaj 1
## 3 8.0 2 1 25453 bencinski spredaj 1
## 4 7.8 2 1 35994 bencinski spredaj 1
## 5 8.0 2 1 19061 bencinski spredaj 1
## 6 8.2 2 1 20215 bencinski spredaj 1
library(factoextra)
MetodaVod <- hkmeans(podatki_clu_std,
k = 4,
hc.metric = "euclidean",
hc.method = "ward.D2")
MetodaVod
## Hierarchical K-means clustering with 4 clusters of sizes 194, 144, 58, 65
##
## Cluster means:
## Ccm Km Vrtljajimax Dolžina Teža
## 1 -0.1618055 0.07397687 0.5032048 0.2908855 -0.05311056
## 2 -0.7547205 -0.87724839 -0.2756734 -1.0166897 -0.97425877
## 3 1.7599239 2.01128548 0.4776949 0.9904028 1.17569166
## 4 0.5845297 -0.07203541 -1.3174010 0.5004334 1.26779376
## Pospešek Hitrost Poraba_90
## 1 -0.4696519 0.3949607 -0.06332458
## 2 0.7697768 -0.8757249 -0.79129802
## 3 -1.2302157 1.6261996 1.36471230
## 4 0.7941093 -0.6898087 0.72428571
##
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 1 1 1 1 1 1 1 1 1 1 2 4 2 1 4 1 1
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
## 3 1 1 3 1 1 1 1 1 1 1 1 3 3 1 3 3
## 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## 1 1 2 1 1 3 3 3 3 1 1 1 2 1 1 1 3
## 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## 3 3 3 3 1 1 1 1 1 3 4 4 4 4 4 4 4
## 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
## 1 1 1 1 4 2 2 4 4 1 2 2 2 2 2 2 2
## 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
## 1 1 1 1 1 1 2 3 2 2 1 2 1 2 2 2 2
## 103 104 106 108 109 110 111 112 113 114 115 116 117 118 119 120 121
## 2 2 3 3 2 2 2 2 2 1 1 2 2 1 2 1 1
## 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
## 2 2 2 1 1 1 1 4 2 2 2 2 2 1 2 1 3
## 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
## 3 1 1 1 1 2 1 1 1 1 2 2 1 1 1 1 1
## 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
## 4 4 3 4 4 4 4 4 4 2 2 2 1 1 1 2 2
## 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
## 2 2 1 2 1 3 1 4 4 2 3 3 4 4 2 2 1
## 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
## 2 1 1 1 1 1 1 1 4 4 4 4 1 2 4 4 4
## 207 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
## 4 3 1 1 1 1 3 1 3 3 3 1 1 1 1 3 3
## 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
## 3 3 3 3 3 3 3 3 3 3 4 4 2 4 4 2 2
## 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
## 1 2 3 1 1 3 1 1 1 4 4 1 2 2 2 1 2
## 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
## 3 1 1 4 1 2 4 2 2 2 4 4 2 1 2 2 1
## 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
## 1 1 1 1 1 1 1 1 2 2 2 1 1 3 4 4 4
## 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
## 1 1 1 1 1 2 2 1 1 1 1 1 2 2 1 2 3
## 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
## 2 1 1 3 2 2 2 1 1 4 4 4 3 2 2 4 2
## 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
## 2 2 1 4 1 1 1 1 1 3 2 2 2 1 1 1 1
## 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
## 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1
## 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## 3 4 2 2 2 2 2 2 2 2 2 2 2 2 2 4 1
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
## 1 4 4 4 4 3 1 1 1 1 2 2 2 2 2 2 2
## 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
## 2 2 2 1 2 2 2 2 2 1 1 4 4 2 2 4 1
## 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
## 1 2 1 2 2 3 1 1 2 1 3 1 1 3 3 4 4
## 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
## 2 2 1 2 2 2 2 2 2 2 2 2 2 2 4 1 2
## 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
## 1 1 1 1 1 4 1 2 1 4 2 3 1 1 3 1 1
## 464 465
## 1 2
##
## Within cluster sum of squares by cluster:
## [1] 421.4469 419.0552 300.9513 341.3704
## (between_SS / total_SS = 59.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault" "data" "hclust"
library(factoextra)
fviz_cluster(MetodaVod,
palette = "Dark2",
repel = FALSE,
ggtheme = theme_linedraw())
podatki$RazvrstitevVod <- MetodaVod$cluster
head(podatki)
## ID Ccm Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 1 1 1995 146 5800 455 1380 9.9 210 6.5
## 2 2 1969 150 6200 429 1370 8.4 210 6.2
## 3 3 1969 150 6200 444 1300 9.0 210 6.2
## 4 4 1969 150 6200 429 1370 8.4 215 6.2
## 5 5 1969 150 6200 410 1240 8.4 210 6.2
## 6 6 1747 140 6300 445 1270 10.0 205 6.1
## Poraba_120 Motor Pogon Cena MotorF PogonF RazvrstitevWARD
## 1 8.3 2 1 33263 bencinski spredaj 1
## 2 7.8 2 1 37387 bencinski spredaj 1
## 3 8.0 2 1 25453 bencinski spredaj 1
## 4 7.8 2 1 35994 bencinski spredaj 1
## 5 8.0 2 1 19061 bencinski spredaj 1
## 6 8.2 2 1 20215 bencinski spredaj 1
## RazvrstitevVod
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
table(podatki$RazvrstitevWARD)
##
## 1 2 3 4
## 191 159 56 55
table(podatki$RazvrstitevVod)
##
## 1 2 3 4
## 194 144 58 65
table(podatki$RazvrstitevWARD, podatki$RazvrstitevVod)
##
## 1 2 3 4
## 1 175 6 2 8
## 2 15 136 0 8
## 3 1 0 54 1
## 4 3 2 2 48
Povprečja <- MetodaVod$centers
Slika <- as.data.frame(Povprečja)
Slika$id <- 1:nrow(Slika)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:pastecs':
##
## extract
## The following object is masked from 'package:magrittr':
##
## extract
Slika <- pivot_longer(Slika, cols = c("Ccm", "Km", "Vrtljajimax", "Dolžina", "Teža", "Pospešek", "Hitrost", "Poraba_90"))
Slika$Skupina <- factor(Slika$id,
levels = c(1, 2, 3, 4),
labels = c("1", "2", "3", "4"))
Slika$ImeF <- factor(Slika$name,
levels = c("Ccm", "Km", "Vrtljajimax", "Dolžina", "Teža", "Pospešek", "Hitrost", "Poraba_90"),
labels = c("Ccm", "Km", "Vrtljajimax", "Dolžina", "Teža", "Pospešek", "Hitrost", "Poraba_90"))
library(ggplot2)
ggplot(Slika, aes(x=ImeF, y=value)) +
geom_hline(yintercept=0) +
theme_linedraw() +
geom_point(aes(shape=Skupina, col=Skupina), size=3) +
geom_line((aes(group = id, linetype = Skupina)), size = 1) +
ylab("Povprečje") +
xlab("Razvrstitvene spremenljivke")+
ylim(-2, 3.5)
fit <- aov(cbind(Ccm, Km, Vrtljajimax, Dolžina, Teža, Pospešek, Hitrost, Poraba_90) ~ as.factor(RazvrstitevVod),
data = podatki)
summary(fit)
## Response Ccm :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 185390013 61796671 257.35 < 2.2e-16
## Residuals 457 109739282 240130
##
## as.factor(RazvrstitevVod) ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Km :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 1298491 432830 466.92 < 2.2e-16 ***
## Residuals 457 423636 927
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Vrtljajimax :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 96045389 32015130 103.51 < 2.2e-16
## Residuals 457 141342344 309283
##
## as.factor(RazvrstitevVod) ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Dolžina :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 372322 124107 163.93 < 2.2e-16 ***
## Residuals 457 345987 757
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Teža :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 29683598 9894533 354.98 < 2.2e-16 ***
## Residuals 457 12738105 27873
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Pospešek :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 2472.0 823.99 192.66 < 2.2e-16 ***
## Residuals 457 1954.5 4.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Hitrost :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 285332 95111 366.76 < 2.2e-16 ***
## Residuals 457 118513 259
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Poraba_90 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 695.63 231.875 156.45 < 2.2e-16 ***
## Residuals 457 677.34 1.482
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aggregate(podatki$Cena,
by = list(podatki$RazvrstitevVod),
FUN = mean)
## Group.1 x
## 1 1 30379.46
## 2 2 15801.22
## 3 3 78875.48
## 4 4 37086.88
fit <- aov(Cena ~ as.factor(RazvrstitevVod),
data = podatki)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(RazvrstitevVod) 3 1.671e+11 5.569e+10 272.4 <2e-16 ***
## Residuals 457 9.342e+10 2.044e+08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hi_kvadrat <- chisq.test(podatki$MotorF, as.factor(podatki$RazvrstitevVod))
hi_kvadrat
##
## Pearson's Chi-squared test
##
## data: podatki$MotorF and as.factor(podatki$RazvrstitevVod)
## X-squared = 155.29, df = 3, p-value < 2.2e-16
addmargins(hi_kvadrat$observed)
##
## podatki$MotorF 1 2 3 4 Sum
## dizelski 9 44 1 48 102
## bencinski 185 100 57 17 359
## Sum 194 144 58 65 461
addmargins(round(hi_kvadrat$expected, 2))
##
## podatki$MotorF 1 2 3 4 Sum
## dizelski 42.92 31.86 12.83 14.38 101.99
## bencinski 151.08 112.14 45.17 50.62 359.01
## Sum 194.00 144.00 58.00 65.00 461.00
round(hi_kvadrat$res, 2)
##
## podatki$MotorF 1 2 3 4
## dizelski -5.18 2.15 -3.30 8.86
## bencinski 2.76 -1.15 1.76 -4.73
hi_kvadrat <- chisq.test(podatki$PogonF, as.factor(podatki$RazvrstitevVod))
hi_kvadrat
##
## Pearson's Chi-squared test
##
## data: podatki$PogonF and as.factor(podatki$RazvrstitevVod)
## X-squared = 189.9, df = 6, p-value < 2.2e-16
addmargins(hi_kvadrat$observed)
##
## podatki$PogonF 1 2 3 4 Sum
## spredaj 145 127 10 21 303
## zadaj 30 4 38 12 84
## 4x4 19 13 10 32 74
## Sum 194 144 58 65 461
addmargins(round(hi_kvadrat$expected, 2))
##
## podatki$PogonF 1 2 3 4 Sum
## spredaj 127.51 94.65 38.12 42.72 303.00
## zadaj 35.35 26.24 10.57 11.84 84.00
## 4x4 31.14 23.11 9.31 10.43 73.99
## Sum 194.00 144.00 58.00 64.99 460.99
round(hi_kvadrat$res, 2)
##
## podatki$PogonF 1 2 3 4
## spredaj 1.55 3.33 -4.55 -3.32
## zadaj -0.90 -4.34 8.44 0.05
## 4x4 -2.18 -2.10 0.23 6.68
library(DescTools)
##
## Attaching package: 'DescTools'
## The following objects are masked from 'package:Hmisc':
##
## %nin%, Label, Mean, Quantile
CramerV(podatki$MotorF, as.factor(podatki$RazvrstitevVod))
## [1] 0.580393
CramerV(podatki$PogonF, as.factor(podatki$RazvrstitevVod))
## [1] 0.4538315
```